In [1]:
%matplotlib inline
from matplotlib import pylab as plt

from metatlas import metatlas_objects
from metatlas import h5_query

import glob, os

Create an experiment


In [2]:
myExperiment = metatlas_objects.Experiment(name = 'QExactive_Hilic_Pos_Actinobacteria_Phylogeny')

Get a list of mzML files that you uploaded and assign them to a group


In [4]:
myPath = '/global/homes/b/bpb/ExoMetabolomic_Example_Data/'
myPath = '/project/projectdirs/metatlas/data_for_metatlas_2/20150324_LPSilva_BHedlund_chloroflexi_POS_rerun/'
myFiles = glob.glob('%s*.mzML'%myPath)
myFiles.sort()
groupID = []
for f in myFiles:
    groupID.append('')
i = 0
while i < len(myFiles):
    a,b = os.path.split(myFiles[i])
    j = raw_input('enter group id for %s [number, "x" to go back]:'%b)
    if j == 'x':
        i = i - 1
    else:
        groupID[i] = j
        i = i + 1

print groupID
uGroupID = sorted(set(groupID))
print uGroupID


enter group id for MEDIA-1.mzML [number, "x" to go back]:1
enter group id for MEDIA-2.mzML [number, "x" to go back]:1
enter group id for MEDIA-3.mzML [number, "x" to go back]:1
enter group id for MEDIA-4.mzML [number, "x" to go back]:1
enter group id for STRAIN1-1.mzML [number, "x" to go back]:2
enter group id for STRAIN1-2.mzML [number, "x" to go back]:2
enter group id for STRAIN1-3.mzML [number, "x" to go back]:2
enter group id for STRAIN1-4.mzML [number, "x" to go back]:2
enter group id for STRAIN2-1.mzML [number, "x" to go back]:3
enter group id for STRAIN2-2.mzML [number, "x" to go back]:3
enter group id for STRAIN2-3.mzML [number, "x" to go back]:3
enter group id for STRAIN2-4.mzML [number, "x" to go back]:3
enter group id for STRAIN3-1.mzML [number, "x" to go back]:4
enter group id for STRAIN3-2.mzML [number, "x" to go back]:4
enter group id for STRAIN3-3.mzML [number, "x" to go back]:4
enter group id for STRAIN3-4.mzML [number, "x" to go back]:4
enter group id for STRAIN4-1.mzML [number, "x" to go back]:5
enter group id for STRAIN4-2.mzML [number, "x" to go back]:5
enter group id for STRAIN4-3.mzML [number, "x" to go back]:5
enter group id for STRAIN4-4.mzML [number, "x" to go back]:5
['1', '1', '1', '1', '2', '2', '2', '2', '3', '3', '3', '3', '4', '4', '4', '4', '5', '5', '5', '5']
['1', '2', '3', '4', '5']

Specify the descriptive names for each group


In [5]:
uGroupName = []
for u in uGroupID:
    j = raw_input('enter group name for Group #%s: '%u)
    uGroupName.append(j)


enter group name for Group #1: Media
enter group name for Group #2: Strain 1
enter group name for Group #3: Strain 2
enter group name for Group #4: Strain 3
enter group name for Group #5: Strain 4

Steps in the file description and conversion process

  • upload mzml files
  • glob to get list of mzml files
  • for a homogenous set of mzml files make a single filespec object with
  • metatlas_objects.FileSpec(polarity = ,group = inclus = )
  • Call an experiment, e = metatlas_objects.Experiment(name = 'Test_20150722')
  • e.load_files(mzmlfiles,sp)
  • repeat this process for each homogeneous set of files
  • Alternative, you can specify your own filespec object for each file

In [6]:
fsList = []
for i,g in enumerate(groupID):
    for j,u in enumerate(uGroupID):
        if g == u:
            fs = metatlas_objects.FileSpec(polarity = 1,
                                          group = uGroupName[j],
                                          inclusion_order = i)
            fsList.append(fs)
            myExperiment.load_files([myFiles[i]],fs)

In [15]:
myExperiment.save()

In [10]:
print myExperiment.finfos[0].hdf_file
print myExperiment.finfos[0].group
print myExperiment.finfos[0].polarity


/global/homes/b/bpb/ExoMetabolomic_Example_Data/MEDIA-1.h5
Media
1

Convert All Your Files Manually

This is typically not performed because the "load_files" command above has already taken care of it


In [1]:
# myH5Files = []
# for f in myFiles:
#     metatlas.mzml_to_hdf('%s'%(f))
#     myH5Files.append(f.replace('.mzML','.h5'))
#     print f

In [14]:
print len(myExperiment.finfos)


20

In [ ]: